{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "91042c76-b05a-43f6-a3ee-7fb415c011b2",
   "metadata": {},
   "source": [
    "执行命令需要在同一行命令中，先source环境名(base,python27,python36,python37,python38,python39,python310,cube-studio)才能pip安装到指定环境，如果不知道有哪些虚拟环境，可以conda info --envs查看"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5834674a-ac83-4c20-986f-602af81fcd65",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: pandas in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (2.0.1)\n",
      "Requirement already satisfied: numpy>=1.20.3 in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (from pandas) (1.24.3)\n",
      "Requirement already satisfied: pytz>=2020.1 in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (from pandas) (2023.3)\n",
      "Requirement already satisfied: tzdata>=2022.1 in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (from pandas) (2023.3)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (from pandas) (2.8.2)\n",
      "Requirement already satisfied: six>=1.5 in /root/miniconda3/envs/cube-studio/lib/python3.9/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
      "\u001B[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001B[0m\u001B[33m\n",
      "\u001B[0m"
     ]
    }
   ],
   "source": [
    "!source activate cube-studio && pip install pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e43791c2-5d05-4e0c-926e-7baa95519ae0",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO: Pandarallel will run on 10 workers.\n",
      "INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.\n"
     ]
    }
   ],
   "source": [
    "import json, os, time, shutil\n",
    "\n",
    "from cubestudio.request.model_client import Client,init\n",
    "from cubestudio.dataset.dataset import Dataset\n",
    "from pandarallel import pandarallel\n",
    "\n",
    "# Initialization\n",
    "pandarallel.initialize(nb_workers=10)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1e690c83-e6e7-4f14-8b8d-0dcc0f666d82",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "初始化验证成功\n"
     ]
    }
   ],
   "source": [
    "# 初始化客户端\n",
    "HOST = 'http://kubeflow-dashboard.infra'\n",
    "token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJhZG1pbiJ9.j6-hUMaFYdSIzfc6i6TJ5DaS96Z9I78SrjxAOg-71yE'\n",
    "username='admin'\n",
    "init(host=HOST,username=username,token=token)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "91d6b0d6-afcf-4b4f-a16a-6aee76641d64",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# 定义一个数据集\n",
    "dataset = Client(Dataset).one(name=\"coco\")\n",
    "if not dataset:\n",
    "    dataset = Client(Dataset).add(name='coco', version='v2014', label='coco未标注数据集', describe='来自于2014年数据，未标注的coco数据集',icon='https://pic2.zhimg.com/80/v2-399df41d8562f8f09b98d288b97c8f8d_1440w.webp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b79da363-a82b-439e-9ea4-176a0d9abf0e",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "准备上传本地数据 ['coco.zip']\n",
      "数据集上传完成 ['coco.zip']\n"
     ]
    }
   ],
   "source": [
    "# 上传数据集\n",
    "features = json.dumps(json.load(open('vision/coco/data.json')),indent=4,ensure_ascii=False)\n",
    "dataset = dataset.update(path='',features=features)\n",
    "dataset.compress('coco.zip','vision/coco')\n",
    "dataset.upload('coco.zip',partition='20230201')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "43627141-cea4-4e03-a80b-7a624e9685f0",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "准备下载数据到 /mnt/admin\n",
      "['http://10.101.140.141/static/dataset/coco/v2014/coco.zip']\n",
      "begin donwload /mnt/admin/coco.zip from http://10.101.140.141/static/dataset/coco/v2014/coco.zip\n",
      "下载数据完成 /mnt/admin\n"
     ]
    }
   ],
   "source": [
    "# 下载数据集\n",
    "os.remove('coco.zip')  if os.path.exists('coco.zip') else ''\n",
    "shutil.rmtree('coco')  if os.path.exists('coco') else ''\n",
    "dataset.download(partition='20230201')\n",
    "dataset.decompress('coco.zip','coco')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0f2cd280-e8db-4b15-b612-596ce7463f2d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# 数据集加载\n",
    "dataset.load('coco')\n",
    "table = dataset.table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "cb793cdb-2f54-4e41-8779-425f71ed7be9",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "num_columns: 3\n",
      "num_rows: 1225\n",
      "column_names:['id', 'image', 'size']\n",
      "shape: (1225, 3)\n",
      "        \n",
      "{'id': Value(dtype='string', id=None), 'image': Image(decode=True, id=None), 'size': Value(dtype='string', id=None)}\n"
     ]
    }
   ],
   "source": [
    "# 读取基础属性\n",
    "print(table.info)\n",
    "print(table.features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9199763b-3951-49e6-b728-b07b33293b2e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cube-studio",
   "language": "python",
   "name": "cube-studio"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
